In-class_Exercise week5

Author

Linsen Lu

pacman::p_load(igraph, tidygraph, ggraph, 
               visNetwork, lubridate, clock,
               tidyverse, graphlayouts, 
               concaveman, ggforce, jsonlite, dplyr)
kg <- fromJSON("data/MC1_graph.json")

Inspect structure

str(kg, max.level = 1)
List of 5
 $ directed  : logi TRUE
 $ multigraph: logi TRUE
 $ graph     :List of 2
 $ nodes     :'data.frame': 17412 obs. of  10 variables:
 $ links     :'data.frame': 37857 obs. of  4 variables:

Extract and inspect

nodes_tbl <- as_tibble(kg$nodes)

edges_tbl <- as_tibble(kg$links) 

Initial EDA

ggplot(data = edges_tbl, 
       aes(y = `Edge Type`)) +
  geom_bar()

id_map <- tibble(id = nodes_tbl$id,
                 index = seq_len(
                   nrow(nodes_tbl)))
esges_tbl <- edges_tbl %>%
  left_join(id_map, by = c("source" = "id")) %>%
  rename(from = index) %>%
  left_join(id_map, by = c("target" = "id")) %>%
  rename(to = index)
edges_tbl <- edges_tbl %>%
  filter(!is.na(source), is.na(target))
graph <- tbl_graph(nodes = nodes_tbl,
                   edges = edges_tbl,
                   directed = kg$directed)
set.seed(1234)
ggraph(graph, layout = "fr") +
  geom_edge_link(alpha = 0.3,
                 colour = "gray") +
  geom_node_point(aes(color = `Node Type`),
                  size = 4) +
  geom_node_text(aes(label = name),
                 repel = TRUE,
                 size = 2.5) +
  theme_void()
graph_memberof <- graph %>%
  activate(edges) %>%
  filter(edges_tbl$`Edge Type` == "MemberOf")
used_node_indices <- graph_memberof %>%
  activate(edges) %>%
  as_tibble() %>%
  select(from, to) %>%
  unlist() %>%
  unique()